All packages used to for analysis and figures in this page:

library(tidyverse)
library(plotly)
library(DT)
library(glmnet)
library(caret)
library(colorRamps)
library(RColorBrewer)
library(colorspace)
library(readxl)
# remotes::install_github("LCBC-UiO/ggseg")
# 
# If that doesn't work: 
# 
# download.file("https://github.com/LCBC-UiO/ggseg/archive/master.zip", "ggseg.zip")
# unzip("ggseg.zip")
# devtools::install_local("ggseg-master")
library(ggseg)

Load in the prepared data:

load("../Prepared_Data.RData")

I’ll split the data into 75% training and 25% test. I’ll use the same rows for the original tau-PET ROI data as well as the PCA-transformed data for comparison.

set.seed(127)
train.index <- sample(nrow(annual.changes), nrow(annual.changes)*0.75, replace=F)

original <- annual.changes %>% ungroup() %>% select(-RID, -interval_num) %>%
  select(CDRSB, amygdala:transversetemporal)
pca <- post.pca %>% ungroup() %>% select(-RID, -interval_num)

original.train <- original[train.index, ]
original.test <- original[-train.index, ]

pca.train <- pca[train.index, ]
pca.test <- pca[-train.index, ]

Individual models

Elastic Net Regression

set.seed(127)

# Use 10-fold cross-validation
myControl <- trainControl(
  method = "cv", 
  number = 10
)

# Train glmnet with caret to try three different alpha values
# and 100 different values for lambda ranging from 0.0001 to 1
regularized.model <- train(CDRSB~., data=original.train, 
                           tuneGrid = expand.grid(
                             alpha = c(0,0.5,1),
                             lambda = seq(0.0001, 1, length = 100)
                           ),
                           method = "glmnet",
                           trControl = myControl
)

What are the optimal values for alpha and lambda obtained via cross-validation?

regularized.model$bestTune
##    alpha lambda
## 13     0 0.1213

What is the importance and coefficient for each of the 31 ROIs?

roi_importance <- varImp(regularized.model)[[1]] %>% rownames_to_column(var="ROI") %>%
  dplyr::rename("Importance"="Overall")
roi_coef <- as.data.frame(as.matrix(coef(regularized.model$finalModel, regularized.model$bestTune$lambda))) %>%
  rownames_to_column(var="ROI") %>% dplyr::rename("Coefficient"="1")
regularized_roi_results <- left_join(roi_importance, roi_coef)
datatable(regularized_roi_results)
mean_import <- mean(regularized_roi_results$Importance)
sd_import <- sd(regularized_roi_results$Importance)
mean_coef <- mean(regularized_roi_results$Coefficient)
sd_coef <- sd(regularized_roi_results$Coefficient)

p_import_coef <- regularized_roi_results %>%
  rowwise() %>%
  mutate(Importance = (Importance-mean_import)/sd_import,
         Coefficient = (Coefficient-mean_coef)/sd_coef,
         Color = Coefficient) %>%
  pivot_longer(cols=c(-ROI, -Color)) %>%
  ggplot(data=., mapping=aes(x=name, y=value, label=ROI, group=ROI)) +
  geom_line(aes(color=Color)) +
  scale_color_continuous_divergingx(palette = 'RdBu', mid = 0, rev=T) + 
  theme_minimal() +
  labs(color="Z-Score\n(Coefficient)") +
  ylab("Z-Score") +
  xlab("Feature") +
  ggtitle("ROI Coefficient vs. Feature") +
  theme(plot.title=element_text(hjust=0.5))

ggplotly(p_import_coef, tooltip=c("label"))
ggseg.aparc <- read_excel("../ggseg_roi.xlsx", sheet=1) %>%
  mutate(Braak=as.numeric(as.roman(Braak)))

ggseg.aseg <- read_excel("../ggseg_roi.xlsx", sheet=2) %>%
  mutate(Braak=as.numeric(as.roman(Braak)))

pal1 <- colorRampPalette(brewer.pal(9, "Reds"))(100)

mean_import <- mean(regularized_roi_results$Importance)
sd_import <- sd(regularized_roi_results$Importance)
mean_coef <- mean(regularized_roi_results$Coefficient)
sd_coef <- sd(regularized_roi_results$Coefficient)


p1 <- regularized_roi_results %>% 
  left_join(., ggseg.aparc, by=c("ROI"="tau_ROI")) %>%
  select(Importance, ggseg_ROI) %>%
  rowwise() %>%
  mutate(Importance = (Importance-mean_import)/sd_import) %>%
  dplyr::rename("region"="ggseg_ROI") %>%
  filter(!is.na(region)) %>%
  ggseg(atlas="dk", mapping=aes(fill=Importance, label=region)) +
  ggtitle("Cross-Validated Elastic Net ROI Coefficients + Importance") +
  scale_fill_continuous_divergingx(palette = 'RdBu', mid = 0, rev=T,
                                   limits=c(-3,3)) + 
  ylab("Importance") +
  labs(fill="Z-score") +
  theme(axis.text=element_blank(),
        plot.title=element_text(hjust=0.5, family="calibri"),
        axis.title.x=element_blank(),
        axis.title.y=element_text(family="calibri"),
        legend.title=element_text(family="calibri"),
        legend.text=element_text(family="calibri"),
        text=element_text(family="calibri"))

p2 <- regularized_roi_results %>% 
  left_join(., ggseg.aparc, by=c("ROI"="tau_ROI")) %>%
  select(Coefficient, ggseg_ROI) %>%
  rowwise() %>%
  mutate(Coefficient = (Coefficient-mean_coef)/sd_coef) %>%
  dplyr::rename("region"="ggseg_ROI") %>%
  filter(!is.na(region)) %>%
  ggseg(atlas="dk", mapping=aes(fill=Coefficient, label=region)) +
  annotate("text", x = 400, y = -40, label = "left", family="calibri") +
  annotate("text", x = 1300, y = -40, label = "right", family="calibri") +
  scale_fill_continuous_divergingx(palette = 'RdBu', mid = 0, rev=T) + 
  ylab("Coefficient") +
  theme(axis.text=element_blank(),
        plot.title=element_text(hjust=0.5, family="calibri"),
        axis.title.x = element_text(family="calibri"),
        axis.title.y=element_text(angle=0, family="calibri"),
        legend.position="none",
        text=element_text(family="calibri"))


ggp1 <- ggplotly(p1, dynamicTicks = T, tooltip=c("label", "fill"), width=800, height=400) 
ggp2 <- ggplotly(p2, dynamicTicks = T, tooltip=c("label", "fill"), width=800, height=400)

subplot(ggp1, ggp2, nrows=2, titleX=T, titleY=T) 

kNN neural net

Original data

PCA-transformed data

Elastic net x3 kNN x3 neural net x3

Compare accuracy